import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from datetime import datetime
from sklearn.preprocessing import KBinsDiscretizer
import plotly.graph_objects as go
from plotly.offline import plot
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore
from pyecharts.charts import Pie, Bar, Map, WordCloud,Line,Grid,Scatter,Radar,Page
from pyecharts import options as opts
from pyecharts.globals import SymbolType
from pyecharts.globals import ThemeType
from pyecharts.charts import Bar, Line, Grid,Tab
from pyecharts.globals import CurrentConfig, NotebookType
from pyecharts.render import make_snapshot
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Grid, Tab
from bokeh.plotting import figure, show
from bokeh.plotting import *
from bokeh.models import ColumnDataSource
from bokeh.models import Legend
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import *
from bokeh.resources import CDN
from bokeh.palettes import Spectral6
from bokeh.transform import factor_cmap
from bokeh.embed import components
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.embed import file_html
df = pd.read_csv('Fire_Incidents.csv')
# select variables that we need
df1 = df[['Incident Number', 'Address', 'Incident Date','Primary Situation','point',
"Suppression Personnel",'Action Taken Primary',
'Item First Ignited','Ignition Cause', 'Alarm DtTm',
'Arrival DtTm','Close DtTm', 'Area of Fire Origin']]
#data cleaning
df1.dropna(inplace = True)
df2 = df1[df1['Item First Ignited'] != "-"]
df3 = df2[df2['Action Taken Primary'] != "-"]
df4 = df3[df3['Area of Fire Origin'] != "-"]
# select the date from 2003-01-01 to 2017-12-31
df4['Date'] = df4['Incident Date'].str.slice(0,10)
df5=df4[df4['Date'] <= "2017-12-31"]
# split the longitudes and latitude
a = df5.point.str.slice(7,-1)
longitudes = []
latitudes = []
for location in list(a):
values = str(location).split()
if len(values) == 1:
longitude, latitude = None, None
else:
longitude, latitude = map(float, values)
longitudes.append(longitude)
latitudes.append(latitude)
df5['longitude'] = longitudes
df5['latitudes'] = latitudes
# split the number of Primary Situation & Action Taken Primary &
# Item First Ignited & Item First Ignited & Area of Fire Origin
# extract year, month, hour, weekofday
b = []
c = []
d = []
e = []
f = [] # extract year
g = [] # extract month
h = [] # extract hour
j = [] # extrat weekofday
k = []
for i in range(len(df5)):
b.append(list(df5['Primary Situation'])[i].split()[0])
c.append(list(df5["Action Taken Primary"])[i].split()[0])
d.append(list(df5['Item First Ignited'])[i].split()[0])
e.append(list(df5['Ignition Cause'])[i].split()[0])
f.append(list(df5['Date'])[i].split('-')[0])
g.append(list(df5['Date'])[i].split('-')[1])
h.append((list(df5['Alarm DtTm'])[i].split("T")[1]).split(":")[0])
k.append(list(df5['Area of Fire Origin'])[i].split()[0])
date_obj = datetime.strptime(list(df5["Date"])[i],"%Y-%m-%d")
weekday = date_obj.weekday()
j.append(weekday)
df5["Primary Situation_number"] = b
df5["Action Taken Primary_number"] = c
df5["Item First Ignited_number"] = d
df5["Ignition Cause_number"] = e
df5["Area of Fire Origin_number"] = k
df5["Year"] = f
df5["Month"] = g
df5["Hour"] = h
df5["weekofday"]=j
df5.replace("UU",100,inplace = True)
df5.replace("U",6,inplace = True)
# foucus on Unintentional
df6 = df5[df5['Ignition Cause_number']=='2']
# discretization
data = np.array(df6['Hour']).reshape(-1,1)
discretizer = KBinsDiscretizer(n_bins=6, encode='ordinal', strategy='uniform')
df6['disc_data_hour'] = discretizer.fit_transform(data)
# prepare data--- pick up the attributes
df7 = df6[["Area of Fire Origin_number",'Primary Situation_number','Action Taken Primary_number','Item First Ignited_number',
'Ignition Cause_number','disc_data_hour',]]
# prepare data--- transfer the type
title = ["Area of Fire Origin_number",'Primary Situation_number','Action Taken Primary_number','Item First Ignited_number',
'Ignition Cause_number','disc_data_hour']
title1=["AFO",'PS','ATP','IFI','IC','Hour']
for i in range(len(title)):
df7[title[i]] = pd.to_numeric(df7[title[i]], errors='coerce').fillna(0).astype(int)
# prepare data---easy to read and recognise
for i in range(len(title)):
def add_prefix(value):
return title1[i] + str(value)
df7[title[i]] = df7[title[i]].apply(add_prefix)
# apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from apyori import apriori
transactions = []
for i in range(len(df7)):
transactions.append(df7.iloc[i, :].values.tolist())
from mlxtend.preprocessing import TransactionEncoder
TE = TransactionEncoder()
data = TE.fit_transform(transactions) # to bool
df = pd.DataFrame(data, columns=TE.columns_)
from mlxtend.frequent_patterns import apriori
items = apriori(df, min_support=0.2, use_colnames=True)
from mlxtend.frequent_patterns import association_rules
rules = association_rules(items, min_threshold=0.6,metric="confidence")
from apyori import apriori
rules1 = apriori(transactions, min_support=0.2, min_confidence=0.6)
results = list(rules1)
# scatter
import plotly.graph_objs as go
from plotly.subplots import make_subplots
# the reults transfer into DataFrame
df = pd.DataFrame(columns=['Support', 'Confidence', 'Antecedents', 'Consequents'])
for relation_record in results:
for ordered_stat in relation_record.ordered_statistics:
antecedents = ', '.join(ordered_stat.items_base)
consequents = ', '.join(ordered_stat.items_add)
support = relation_record.support
confidence = ordered_stat.confidence
df = df.append({'Support': support, 'Confidence': confidence,
'Antecedents': antecedents, 'Consequents': consequents},
ignore_index=True)
# subplot
specs = [[{'type': 'scatter'}]]
fig = make_subplots(rows=1, cols=1, specs=specs,
subplot_titles=['Scatter Plot'])
# add scatter
scatter_trace = go.Scatter(x=df['Support'], y=df['Confidence'], mode='markers')
fig.add_trace(scatter_trace, row=1, col=1)
fig.update_layout(title='Apriori Analysis',
xaxis_title='Support', yaxis_title='Confidence')
fig.show()
# bar chart
specs = [[{'type': 'bar'}]]
fig = make_subplots(rows=1, cols=1, specs=specs,
subplot_titles=['Bar Chart'])
# 添加条形图
bar_trace = go.Bar(x=df['Antecedents'], y=df['Support'])
fig.add_trace(bar_trace, row=1, col=1)
fig.update_layout(title='Apriori Analysis',
xaxis_title='Antecedents', yaxis_title='Support')
fig.show()